# -*- coding: utf-8 -*-
import scrapy
from xinlang.items import XinlangItem


class CatalogueSpider(scrapy.Spider):
    name = 'catalogue'
    allowed_domains = ['news.sina.com.cn']
    start_urls = ['http://news.sina.com.cn/guide/']

    def parse(self, response):
        # 分类获取所有小标题标题内容
        # 解析数据的时候如果是response.xpath(""),会自动地认为是response.selector.xpath("")
        totallist = response.xpath("//div[@class='clearfix']")
        for a in totallist:
            dlist = a.css("ul li a")
            for v in dlist:
                item = XinlangItem()
                item["title"] = v.css("::text").extract_first()
                item["href"] = v.css("::attr(href)").extract_first()
                print(item)
                # yield item
                # 数据库名为xinlang，表名为catalogue
